Analyze selection data using soluble Ephrin-B2 or -B3¶
In [1]:
# this cell is tagged as parameters for `papermill` parameterization
#input configs
altair_config = None
nipah_config = None
#input files
entropy_file = None
func_scores_E2_file = None
binding_E2_file = None
func_scores_E3_file = None
binding_E3_file = None
#output files
filtered_E2_binding_data = None
filtered_E3_binding_data = None
filtered_E2_binding_low_effect = None
filtered_E3_binding_low_effect = None
#output images
entry_binding_combined_corr_plot = None
entry_binding_combined_corr_plot_agg = None
E2_E3_correlation = None
E2_E3_correlation_site = None
combined_E2_E3_site_corr = None
binding_by_site_plot = None
entry_binding_corr_heatmap = None
binding_corr_heatmap = None
binding_region_boxplot_plot = None
binding_region_bubble_plot = None
max_binding_in_stalk = None
max_binding_in_contact = None
In [2]:
# Parameters
nipah_config = "nipah_config.yaml"
altair_config = "data/custom_analyses_data/theme.py"
entropy_file = "results/entropy/entropy.csv"
func_scores_E2_file = "results/func_effects/averages/CHO_EFNB2_low_func_effects.csv"
binding_E2_file = "results/receptor_affinity/averages/EFNB2_monomeric_mut_effect.csv"
func_scores_E3_file = "results/func_effects/averages/CHO_EFNB3_low_func_effects.csv"
binding_E3_file = "results/receptor_affinity/averages/EFNB3_dimeric_mut_effect.csv"
filtered_E2_binding_data = "results/filtered_data/E2_binding_filtered.csv"
filtered_E3_binding_data = "results/filtered_data/E3_binding_filtered.csv"
filtered_E2_binding_low_effect = (
"results/filtered_data/E2_binding_low_effect_filter.csv"
)
filtered_E3_binding_low_effect = (
"results/filtered_data/E3_binding_low_effect_filter.csv"
)
entry_binding_combined_corr_plot = (
"results/images/entry_binding_combined_corr_plot.html"
)
entry_binding_combined_corr_plot_agg = (
"results/images/entry_binding_combined_corr_plot_agg.html"
)
E2_E3_correlation = "results/images/E2_E3_correlation.html"
E2_E3_correlation_site = "results/images/E2_E3_correlation_site.html"
combined_E2_E3_site_corr = "results/images/combined_E2_E3_site_corr.html"
binding_by_site_plot = "results/images/binding_by_site_plot.html"
entry_binding_corr_heatmap = "results/images/entry_binding_corr_heatmap.html"
binding_corr_heatmap = "results/images/binding_corr_heatmap.html"
binding_region_boxplot_plot = "results/images/binding_region_boxplot_plot.html"
binding_region_bubble_plot = "results/images/binding_region_bubble_plot.html"
max_binding_in_contact = "results/images/max_binding_in_contact.html"
max_binding_in_stalk = "results/images/max_binding_in_stalk.html"
In [3]:
import math
import os
import re
import altair as alt
import numpy as np
import pandas as pd
import scipy.stats
import yaml
In [4]:
# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()
if os.getcwd() == '/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/':
pass
print("Already in correct directory")
else:
os.chdir("/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/")
print("Setup in correct directory")
Setup in correct directory
hard paths for running in interactive mode¶
In [5]:
if nipah_config is None:
##hard paths in case don't want to run with snakemake
print('loading hard paths')
altair_config = "data/custom_analyses_data/theme.py"
nipah_config = "nipah_config.yaml"
entropy_file = 'results/entropy/entropy.csv'
#input files
func_scores_E2_file = "results/func_effects/averages/CHO_EFNB2_low_func_effects.csv"
binding_E2_file = "results/receptor_affinity/averages/EFNB2_monomeric_mut_effect.csv"
func_scores_E3_file = "results/func_effects/averages/CHO_EFNB3_low_func_effects.csv"
binding_E3_file = "results/receptor_affinity/averages/EFNB3_dimeric_mut_effect.csv"
filtered_E2_binding_data="results/filtered_data/E2_binding_filtered.csv"
filtered_E3_binding_data="results/filtered_data/E3_binding_filtered.csv"
filtered_E2_binding_low_effect="results/filtered_data/E2_binding_low_effect_filter.csv"
filtered_E3_binding_low_effect="results/filtered_data/E3_binding_low_effect_filter.csv"
Run config files to setup altair theme and config variables¶
In [6]:
if altair_config:
with open(altair_config, 'r') as file:
exec(file.read())
with open(nipah_config) as f:
config = yaml.safe_load(f)
Import the binding and entry data for EFNB2 and EFNB3¶
In [7]:
#import binding and entry data
e2 = pd.read_csv(binding_E2_file)
e2_func = pd.read_csv(func_scores_E2_file)
e3 = pd.read_csv(binding_E3_file)
e3_func = pd.read_csv(func_scores_E3_file)
Filter the data and save¶
In [8]:
def merge_func_binding_dfs(func,binding,name):
df_int = pd.merge(
binding,
func,
on=['site','mutant','wildtype'],
suffixes=['_binding','_cell_entry'],
validate='one_to_one',
how='outer'
).round(3)
df = df_int.rename(columns={'Ephrin binding_mean':'binding_mean','Ephrin binding_std':'binding_std','Ephrin binding_median':'binding_median'})
# Only save relevant columns
df = df[['site','wildtype','mutant','binding_median','binding_std','times_seen_binding','effect','effect_std','times_seen_cell_entry','frac_models']]
def filter_binding_data(df):
df_filter = df[
(df['mutant'] != '*') &
(df['mutant'] != '-') &
(df['site'] != 603) &
# Filter cell entry parameters
(df['effect'] >= config['min_func_effect_for_binding']) &
(df['times_seen_cell_entry'] >= config['func_times_seen_cutoff']) &
(df['effect_std'] <= config['func_std_cutoff']) &
# Filter binding parameters
(df['times_seen_binding'] >= config['min_times_seen_binding']) &
(df['binding_std'] <= config['max_binding_std']) &
(df['frac_models'] >= config['frac_models'])
]
return df_filter
df_filter = filter_binding_data(df)
#For pulling out low effect mutants for heatmaps later. Find mutants below func effect cutoff, but still have ok times_seen and func_std.
def store_filtered_info(df):
df_low_filter = df[
(df['mutant'] != '*') &
(df['mutant'] != '-') &
(df['site'] != 603) &
(df['effect'] < config['min_func_effect_for_binding']) &
(df['times_seen_cell_entry'] >= config['func_times_seen_cutoff']) &
(df['effect_std'] <= config['func_std_cutoff'])
]
return df_low_filter
df_low_effect_filter = store_filtered_info(df)
# Save the filtered data to .csv
if name == 'EFNB2':
print(name)
df_filter.to_csv(filtered_E2_binding_data,index=False)
df_low_effect_filter.to_csv(filtered_E2_binding_low_effect,index=False)
else:
df_filter.to_csv(filtered_E3_binding_data,index=False)
df_low_effect_filter.to_csv(filtered_E3_binding_low_effect,index=False)
return df_filter,df_low_effect_filter
#Call filtering function
df_E2_filter,df_E2_filter_missing = merge_func_binding_dfs(e2_func,e2,'EFNB2')
df_E3_filter,df_E3_filter_missing = merge_func_binding_dfs(e3_func,e3,'EFNB3')
#Now that they are filtered, merge EFNB2 and EFNB3
df_binding_effect_merge = pd.merge(
df_E2_filter,
df_E3_filter,
on=['site','wildtype','mutant'],
suffixes=['_E2','_E3'],
how='outer'
)
#display stats
display(df_binding_effect_merge.describe().round(3))
# Make a concat df of E2/E3 data for plotting later
df_E2_filter['selection'] = 'EFNB2'
df_E3_filter['selection'] = 'EFNB3'
df_binding_effect_concat = pd.concat([df_E2_filter,df_E3_filter])
EFNB2
| site | binding_median_E2 | binding_std_E2 | times_seen_binding_E2 | effect_E2 | effect_std_E2 | times_seen_cell_entry_E2 | frac_models_E2 | binding_median_E3 | binding_std_E3 | times_seen_binding_E3 | effect_E3 | effect_std_E3 | times_seen_cell_entry_E3 | frac_models_E3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 7212.000 | 6684.000 | 6684.000 | 6684.000 | 6684.000 | 6684.000 | 6684.000 | 6684.000 | 6519.000 | 6519.000 | 6519.000 | 6519.000 | 6519.000 | 6519.000 | 6519.0 |
| mean | 343.684 | -0.333 | 0.501 | 6.400 | -0.061 | 0.368 | 7.699 | 0.998 | -0.017 | 0.179 | 6.227 | -0.039 | 0.388 | 6.804 | 1.0 |
| std | 148.183 | 1.095 | 0.321 | 3.203 | 0.495 | 0.191 | 4.383 | 0.022 | 0.271 | 0.169 | 3.053 | 0.474 | 0.186 | 3.584 | 0.0 |
| min | 71.000 | -5.494 | 0.008 | 2.250 | -1.500 | 0.010 | 2.000 | 0.750 | -2.147 | 0.000 | 2.500 | -1.500 | 0.033 | 2.000 | 1.0 |
| 25% | 217.000 | -0.416 | 0.273 | 4.500 | -0.335 | 0.223 | 5.000 | 1.000 | -0.150 | 0.062 | 4.500 | -0.266 | 0.248 | 4.714 | 1.0 |
| 50% | 342.000 | -0.033 | 0.426 | 5.750 | 0.076 | 0.337 | 6.750 | 1.000 | -0.012 | 0.135 | 5.500 | 0.094 | 0.356 | 6.000 | 1.0 |
| 75% | 468.000 | 0.190 | 0.645 | 7.500 | 0.329 | 0.480 | 9.125 | 1.000 | 0.118 | 0.241 | 7.500 | 0.324 | 0.496 | 7.857 | 1.0 |
| max | 602.000 | 2.205 | 1.995 | 43.750 | 0.617 | 0.994 | 72.380 | 1.000 | 2.006 | 1.780 | 45.000 | 0.616 | 1.000 | 56.710 | 1.0 |
In [9]:
#What are the top 5 highest and lowest binding mutants for EFNB2 and EFNB3?
def find_highest_lowest(df,name):
print(f'We are analyzing {name}\n')
tmp_df = df.sort_values(by='binding_median')
print('These are the lowest binding mutants detected:')
display(tmp_df.head(5))
tmp_df_high = df.sort_values(by='binding_median',ascending=False)
print('\nThese are the highest binding mutants detected:\n')
display(tmp_df_high.head(10))
# What about mutants with positive entry scores?
tmp_df = df[df['effect'] > 0].sort_values(by='binding_median')
print('These are the lowest binding mutants detected with positive entry scores:')
display(tmp_df.head(5))
tmp_df_high = df[df['effect'] > 0].sort_values(by='binding_median',ascending=False)
print('\nThese are the highest binding mutants detected with positive entry scores:\n')
display(tmp_df_high.head(10))
find_highest_lowest(df_E2_filter,'EFNB2')
find_highest_lowest(df_E3_filter,'EFNB3')
We are analyzing EFNB2 These are the lowest binding mutants detected:
| site | wildtype | mutant | binding_median | binding_std | times_seen_binding | effect | effect_std | times_seen_cell_entry | frac_models | selection | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 8285 | 501 | E | K | -5.494 | 0.928 | 24.50 | -0.712 | 0.606 | 32.00 | 1.0 | EFNB2 |
| 8898 | 532 | A | V | -5.180 | 1.140 | 16.25 | -0.292 | 0.312 | 17.88 | 1.0 | EFNB2 |
| 8072 | 490 | Q | K | -4.871 | 1.081 | 8.50 | -0.673 | 0.539 | 11.38 | 1.0 | EFNB2 |
| 8070 | 490 | Q | H | -4.864 | 0.862 | 8.25 | -0.273 | 0.668 | 10.88 | 1.0 | EFNB2 |
| 3188 | 236 | R | H | -4.853 | 0.883 | 22.00 | -1.217 | 0.335 | 29.38 | 1.0 | EFNB2 |
These are the highest binding mutants detected:
| site | wildtype | mutant | binding_median | binding_std | times_seen_binding | effect | effect_std | times_seen_cell_entry | frac_models | selection | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 5459 | 354 | S | R | 2.205 | 1.165 | 4.25 | -0.922 | 0.537 | 6.750 | 1.00 | EFNB2 |
| 9830 | 580 | I | S | 2.165 | 1.768 | 4.75 | -0.750 | 0.692 | 6.375 | 1.00 | EFNB2 |
| 9948 | 586 | N | T | 2.096 | 0.578 | 5.25 | -0.658 | 0.823 | 7.000 | 1.00 | EFNB2 |
| 655 | 104 | E | T | 2.094 | 0.755 | 4.50 | -0.348 | 0.671 | 3.500 | 1.00 | EFNB2 |
| 2696 | 211 | G | F | 2.060 | 0.549 | 4.75 | -0.753 | 0.426 | 5.125 | 1.00 | EFNB2 |
| 2621 | 207 | L | I | 2.042 | 1.531 | 4.00 | -0.816 | 0.764 | 3.750 | 1.00 | EFNB2 |
| 1298 | 138 | I | L | 1.968 | 0.917 | 5.00 | -1.114 | 0.244 | 4.750 | 1.00 | EFNB2 |
| 2051 | 178 | V | H | 1.938 | 0.730 | 3.25 | -0.639 | 0.903 | 3.625 | 1.00 | EFNB2 |
| 9307 | 553 | S | W | 1.923 | 0.287 | 10.00 | -0.396 | 0.429 | 13.250 | 1.00 | EFNB2 |
| 4768 | 318 | R | P | 1.922 | 1.433 | 3.00 | -0.961 | 0.764 | 4.000 | 0.75 | EFNB2 |
These are the lowest binding mutants detected with positive entry scores:
| site | wildtype | mutant | binding_median | binding_std | times_seen_binding | effect | effect_std | times_seen_cell_entry | frac_models | selection | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 9980 | 588 | I | L | -4.684 | 0.874 | 5.750 | 0.159 | 0.420 | 6.375 | 1.0 | EFNB2 |
| 9402 | 558 | A | V | -4.660 | 0.730 | 17.250 | 0.152 | 0.364 | 18.500 | 1.0 | EFNB2 |
| 9369 | 557 | N | D | -4.637 | 0.399 | 9.500 | 0.547 | 0.479 | 11.120 | 1.0 | EFNB2 |
| 8146 | 494 | P | E | -4.598 | 0.874 | 7.812 | 0.287 | 0.194 | 9.750 | 1.0 | EFNB2 |
| 8773 | 526 | L | H | -4.539 | 0.782 | 5.250 | 0.222 | 0.392 | 6.750 | 1.0 | EFNB2 |
These are the highest binding mutants detected with positive entry scores:
| site | wildtype | mutant | binding_median | binding_std | times_seen_binding | effect | effect_std | times_seen_cell_entry | frac_models | selection | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 9548 | 566 | F | H | 1.388 | 0.847 | 4.25 | 0.186 | 0.554 | 4.375 | 1.0 | EFNB2 |
| 7303 | 450 | Q | I | 1.330 | 1.194 | 5.00 | 0.316 | 0.399 | 6.000 | 1.0 | EFNB2 |
| 10034 | 591 | K | G | 1.266 | 0.156 | 5.75 | 0.027 | 0.442 | 6.125 | 1.0 | EFNB2 |
| 4540 | 306 | N | T | 1.239 | 0.491 | 4.00 | 0.110 | 0.357 | 7.625 | 1.0 | EFNB2 |
| 9513 | 564 | N | K | 1.196 | 0.488 | 7.25 | 0.228 | 0.442 | 8.000 | 1.0 | EFNB2 |
| 2034 | 177 | G | M | 1.193 | 0.618 | 6.50 | 0.380 | 0.534 | 5.375 | 1.0 | EFNB2 |
| 7184 | 444 | I | F | 1.162 | 0.350 | 5.00 | 0.294 | 0.314 | 6.250 | 1.0 | EFNB2 |
| 2519 | 202 | L | F | 1.156 | 0.358 | 3.75 | 0.096 | 0.344 | 4.375 | 1.0 | EFNB2 |
| 4594 | 309 | Y | P | 1.148 | 1.065 | 3.00 | 0.389 | 0.521 | 2.875 | 1.0 | EFNB2 |
| 9764 | 577 | L | I | 1.115 | 0.101 | 5.75 | 0.041 | 0.398 | 5.500 | 1.0 | EFNB2 |
We are analyzing EFNB3 These are the lowest binding mutants detected:
| site | wildtype | mutant | binding_median | binding_std | times_seen_binding | effect | effect_std | times_seen_cell_entry | frac_models | selection | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 9221 | 555 | D | K | -2.147 | 1.225 | 6.0 | 0.251 | 0.422 | 6.714 | 1.0 | EFNB3 |
| 9227 | 555 | D | R | -1.510 | 0.420 | 4.0 | 0.312 | 0.526 | 4.143 | 1.0 | EFNB3 |
| 8739 | 530 | Q | L | -1.447 | 0.477 | 3.5 | -1.231 | 0.862 | 5.857 | 1.0 | EFNB3 |
| 9761 | 583 | T | R | -1.410 | 0.446 | 5.5 | -0.870 | 0.485 | 6.286 | 1.0 | EFNB3 |
| 9755 | 583 | T | K | -1.247 | 0.113 | 5.0 | -0.667 | 0.644 | 7.000 | 1.0 | EFNB3 |
These are the highest binding mutants detected:
| site | wildtype | mutant | binding_median | binding_std | times_seen_binding | effect | effect_std | times_seen_cell_entry | frac_models | selection | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 9866 | 589 | R | G | 2.006 | 0.659 | 7.0 | -1.197 | 0.576 | 8.286 | 1.0 | EFNB3 |
| 9699 | 580 | I | M | 1.337 | 0.534 | 5.5 | -0.873 | 0.531 | 6.714 | 1.0 | EFNB3 |
| 3777 | 270 | V | Q | 1.329 | 1.028 | 5.0 | -0.920 | 0.651 | 5.429 | 1.0 | EFNB3 |
| 3719 | 267 | M | Q | 1.291 | 1.371 | 4.5 | -0.594 | 0.818 | 5.667 | 1.0 | EFNB3 |
| 8010 | 492 | Q | L | 1.261 | 0.067 | 5.5 | 0.548 | 0.183 | 5.143 | 1.0 | EFNB3 |
| 1150 | 132 | S | Y | 1.247 | 0.722 | 5.5 | -1.024 | 0.573 | 6.000 | 1.0 | EFNB3 |
| 9691 | 580 | I | C | 1.241 | 0.524 | 4.0 | -0.173 | 0.401 | 4.857 | 1.0 | EFNB3 |
| 9871 | 589 | R | M | 1.234 | 0.635 | 4.5 | -0.726 | 0.342 | 5.429 | 1.0 | EFNB3 |
| 9566 | 573 | W | M | 1.215 | 1.373 | 4.0 | -0.835 | 0.676 | 5.429 | 1.0 | EFNB3 |
| 9853 | 588 | I | P | 1.183 | 0.255 | 5.5 | -0.607 | 0.382 | 5.429 | 1.0 | EFNB3 |
These are the lowest binding mutants detected with positive entry scores:
| site | wildtype | mutant | binding_median | binding_std | times_seen_binding | effect | effect_std | times_seen_cell_entry | frac_models | selection | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 9221 | 555 | D | K | -2.147 | 1.225 | 6.0 | 0.251 | 0.422 | 6.714 | 1.0 | EFNB3 |
| 9227 | 555 | D | R | -1.510 | 0.420 | 4.0 | 0.312 | 0.526 | 4.143 | 1.0 | EFNB3 |
| 9323 | 560 | K | S | -1.022 | 0.438 | 5.5 | 0.019 | 0.446 | 5.833 | 1.0 | EFNB3 |
| 8747 | 530 | Q | W | -0.997 | 0.547 | 3.0 | 0.144 | 0.665 | 3.286 | 1.0 | EFNB3 |
| 2784 | 218 | T | S | -0.916 | 0.931 | 5.0 | 0.267 | 0.884 | 4.571 | 1.0 | EFNB3 |
These are the highest binding mutants detected with positive entry scores:
| site | wildtype | mutant | binding_median | binding_std | times_seen_binding | effect | effect_std | times_seen_cell_entry | frac_models | selection | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 8010 | 492 | Q | L | 1.261 | 0.067 | 5.5 | 0.548 | 0.183 | 5.143 | 1.0 | EFNB3 |
| 2637 | 211 | G | F | 1.169 | 0.289 | 5.0 | 0.436 | 0.572 | 5.714 | 1.0 | EFNB3 |
| 10031 | 598 | P | G | 1.154 | 1.559 | 4.5 | 0.422 | 0.515 | 5.429 | 1.0 | EFNB3 |
| 9192 | 553 | S | W | 0.994 | 0.435 | 9.5 | 0.099 | 0.406 | 9.857 | 1.0 | EFNB3 |
| 9745 | 582 | D | W | 0.976 | 0.338 | 6.5 | 0.111 | 0.310 | 7.143 | 1.0 | EFNB3 |
| 1663 | 161 | S | H | 0.923 | 0.294 | 4.0 | 0.077 | 0.465 | 4.143 | 1.0 | EFNB3 |
| 1818 | 169 | R | G | 0.859 | 0.347 | 7.5 | 0.096 | 0.473 | 7.571 | 1.0 | EFNB3 |
| 2977 | 228 | Y | R | 0.810 | 0.113 | 5.0 | 0.173 | 0.346 | 5.714 | 1.0 | EFNB3 |
| 1770 | 166 | L | W | 0.807 | 0.348 | 6.5 | 0.358 | 0.341 | 6.429 | 1.0 | EFNB3 |
| 1667 | 161 | S | M | 0.782 | 0.376 | 4.5 | 0.063 | 0.370 | 5.857 | 1.0 | EFNB3 |
In [10]:
#Compare E2 and E3 binders
def find_highest_lowest(df):
df['binding_diff'] = (df['binding_median_E2'] - df['binding_median_E3']).abs()
print('These are the mutants with the biggest difference between EFNB2 and EFNB3:\n')
display(df.sort_values(by='binding_diff',ascending=False).head(5))
#calculate aggregate differences
agg_df = df.groupby('site')[['binding_median_E2','binding_median_E3','binding_diff']].max().reset_index()
print('These are the sites with the biggest difference between EFNB2 and EFNB3:\n')
display(agg_df.sort_values(by='binding_diff',ascending=False).head(5))
find_highest_lowest(df_binding_effect_merge)
#find_highest_lowest(df_E3_filter,'EFNB3')
These are the mutants with the biggest difference between EFNB2 and EFNB3:
| site | wildtype | mutant | binding_median_E2 | binding_std_E2 | times_seen_binding_E2 | effect_E2 | effect_std_E2 | times_seen_cell_entry_E2 | frac_models_E2 | binding_median_E3 | binding_std_E3 | times_seen_binding_E3 | effect_E3 | effect_std_E3 | times_seen_cell_entry_E3 | frac_models_E3 | binding_diff | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 5729 | 530 | Q | F | -4.114 | 0.466 | 6.25 | 0.550 | 0.202 | 5.375 | 1.0 | 0.084 | 0.062 | 6.0 | 0.408 | 0.274 | 6.286 | 1.0 | 4.198 |
| 5258 | 490 | Q | W | -4.519 | 1.005 | 3.75 | 0.407 | 0.378 | 4.750 | 1.0 | -0.586 | 0.323 | 3.0 | -0.130 | 0.458 | 3.286 | 1.0 | 3.933 |
| 5259 | 490 | Q | Y | -4.089 | 0.900 | 5.50 | -1.085 | 0.400 | 6.375 | 1.0 | -0.461 | 0.574 | 5.5 | -0.280 | 0.797 | 5.714 | 1.0 | 3.628 |
| 5263 | 491 | S | G | -4.183 | 0.595 | 6.50 | 0.058 | 0.298 | 8.625 | 1.0 | -0.562 | 0.047 | 5.5 | -0.387 | 0.608 | 6.286 | 1.0 | 3.621 |
| 5283 | 492 | Q | K | -3.484 | 0.549 | 11.00 | 0.430 | 0.232 | 11.620 | 1.0 | 0.006 | 0.077 | 10.0 | 0.388 | 0.240 | 11.570 | 1.0 | 3.490 |
These are the sites with the biggest difference between EFNB2 and EFNB3:
| site | binding_median_E2 | binding_median_E3 | binding_diff | |
|---|---|---|---|---|
| 441 | 530 | 0.628 | 0.405 | 4.198 |
| 405 | 490 | -0.967 | 0.112 | 3.933 |
| 406 | 491 | -0.799 | 0.645 | 3.621 |
| 407 | 492 | 0.362 | 1.261 | 3.490 |
| 162 | 239 | -0.393 | -0.041 | 3.418 |
Make plots showing correlation between binding and entry for EFNB2 and EFNB3¶
In [11]:
def plot_corr_binding_entry_updated(df,flag):
variant_selector = alt.selection_point(
on="mouseover",
empty=False,
fields=["site","mutant"],
value=0
)
variant_selector_agg = alt.selection_point(
on="mouseover",
empty=False,
fields=["site"],
value=0
)
slider = alt.binding_range(min=2, max=10, step=1, name="times seen")
selector = alt.param(name="SelectorName", value=2, bind=slider)
empty_chart = []
for cell in list(df['selection'].unique()):
tmp_df = df[df['selection'] == cell]
if flag == True:
agg_df = tmp_df.groupby('site')[['binding_median','effect']].sum().reset_index()
chart = alt.Chart(agg_df).mark_point(stroke='black',filled=True).encode(
x=alt.X('effect', title=f'Summed {cell} Cell Entry', axis=alt.Axis(grid=True)),
y=alt.Y('binding_median', title=f'Summed {cell} Binding', axis=alt.Axis(grid=True)),
opacity=alt.condition(variant_selector_agg, alt.value(1), alt.value(0.2)),
size=alt.condition(variant_selector_agg,alt.value(100),alt.value(50)),
strokeWidth=alt.condition(variant_selector_agg,alt.value(1),alt.value(0)),
color=alt.condition(variant_selector_agg,alt.value('orange'),alt.value('black')),
tooltip=['site', 'binding_median','effect'],
).add_params(variant_selector_agg)
empty_chart.append(chart)
else:
chart = alt.Chart(tmp_df).mark_point(stroke='black',filled=True).encode(
x=alt.X('effect', title=f'{cell} Cell Entry', axis=alt.Axis(grid=True)),
y=alt.Y('binding_median', title=f'{cell} Binding', axis=alt.Axis(grid=True)),
opacity=alt.condition(variant_selector, alt.value(1), alt.value(0.1)),
size=alt.condition(variant_selector,alt.value(50),alt.value(20)),
strokeWidth=alt.condition(variant_selector,alt.value(1),alt.value(0)),
color=alt.condition(variant_selector,alt.value('orange'),alt.value('black')),
tooltip=['site', 'wildtype', 'mutant','binding_median','times_seen_binding','effect'],
).add_params(variant_selector)
empty_chart.append(chart)
combined_chart = alt.hconcat(*empty_chart,title=alt.Title('Correlation between binding and entry'))
return combined_chart
entry_binding_corr_plot = plot_corr_binding_entry_updated(df_binding_effect_concat,False)
entry_binding_corr_plot.display()
if entry_binding_combined_corr_plot is not None:
entry_binding_corr_plot.save(entry_binding_combined_corr_plot)
entry_binding_corr_plot_agg = plot_corr_binding_entry_updated(df_binding_effect_concat,True)
entry_binding_corr_plot_agg.display()
if entry_binding_combined_corr_plot is not None:
entry_binding_corr_plot_agg.save(entry_binding_combined_corr_plot_agg)
Same plot as above, but slightly different format¶
In [12]:
def plot_entry_binding_corr_heatmap(df):
empty_chart = []
for cell in list(df['selection'].unique()):
tmp_df = df[df['selection'] == cell]
chart = alt.Chart(tmp_df,title=f'{cell}').mark_rect().encode(
x=alt.X('effect',title='Cell Entry',axis=alt.Axis(values=[-2,-1,0,1])).bin(maxbins=60),
y=alt.Y('binding_median',title='Binding',axis=alt.Axis(values=[-4,-2,0,2])).bin(maxbins=60),
color=alt.Color('count()',title='Count').scale(scheme='greenblue'),
)
empty_chart.append(chart)
combined_chart = alt.hconcat(*empty_chart,title=alt.Title('Correlation between binding and entry')).resolve_scale(y='shared',x='shared',color='shared')
return combined_chart
entry_binding_corr_heat = plot_entry_binding_corr_heatmap(df_binding_effect_concat)
entry_binding_corr_heat.display()
if entry_binding_combined_corr_plot is not None:
entry_binding_corr_heat.save(entry_binding_corr_heatmap)
Calculate some stats on binding¶
In [13]:
def overall_stats(df,effect,name):
#Now group sites and find sites where all mutants are deleterious
filtered_df = df.groupby('site').filter(lambda group: (group[effect] <-0.25).all())
#Which sites are these?
unique = filtered_df['site'].unique()
# Convert unique to a Pandas Series
unique_series = pd.Series(unique)
# Find the common elements that are also contact sites
unique_contact_bool = unique_series.isin(config['contact_sites'])
# Filter and get the common elements
common_elements = unique_series[unique_contact_bool]
print(f"The dataset we are analyzing is: {name}\n")
# Print the common elements
print(f'Here are the contact sites that only have negative binding scores: {list(common_elements)}\n')
print(f'There are {len(unique)} sites with all negative binding score mutants\n')
print(f'These are the sites with all negative binding score mutants: {list(unique)}\n')
#Now find sites with low and high binding (median)
median_df = df.groupby('site')['binding_median'].max().reset_index().sort_values(by='binding_median',ascending=False)
print('These are the sites with the highest binding mutants:\n')
display(median_df.head(5))
#Now calculate mutant number
total_mutants = df.shape[0]
mutants_above_cutoff_tolerated = df[df['effect'] > 0]
mutants_above_cutoff_tolerated = mutants_above_cutoff_tolerated[['site','effect','binding_median','wildtype','mutant']]
total_sites = df['site'].unique().shape[0]
print(f'The total number of sites are: {total_sites}')
overall_stats(df_E2_filter,'binding_median','EFNB2')
overall_stats(df_E3_filter,'binding_median','EFNB3')
The dataset we are analyzing is: EFNB2 Here are the contact sites that only have negative binding scores: [238, 239, 242, 389, 488, 490, 491, 501, 504, 505, 531, 532, 533, 557, 579, 581, 588] There are 43 sites with all negative binding score mutants These are the sites with all negative binding score mutants: [116, 220, 236, 238, 239, 242, 243, 248, 346, 351, 352, 389, 390, 398, 399, 400, 435, 438, 441, 460, 467, 486, 487, 488, 490, 491, 494, 495, 497, 501, 504, 505, 526, 531, 532, 533, 557, 579, 581, 584, 585, 588, 590] These are the sites with the highest binding mutants:
| site | binding_median | |
|---|---|---|
| 272 | 354 | 2.205 |
| 487 | 580 | 2.165 |
| 493 | 586 | 2.096 |
| 33 | 104 | 2.094 |
| 135 | 211 | 2.060 |
The total number of sites are: 510 The dataset we are analyzing is: EFNB3 Here are the contact sites that only have negative binding scores: [389, 488, 501, 531, 532] There are 15 sites with all negative binding score mutants These are the sites with all negative binding score mutants: [108, 140, 352, 389, 467, 486, 488, 494, 495, 497, 501, 510, 531, 532, 584] These are the sites with the highest binding mutants:
| site | binding_median | |
|---|---|---|
| 490 | 589 | 2.006 |
| 482 | 580 | 1.337 |
| 189 | 270 | 1.329 |
| 186 | 267 | 1.291 |
| 405 | 492 | 1.261 |
The total number of sites are: 504
Find sites with opposite effects on binding¶
In [14]:
#find sites that are different
def find_biggest_differences(df):
efnb2_good_efnb3_bad = df[
(df['binding_median_E2'] > 0.5) &
(df['binding_median_E3'] < -0.5)
].sort_values(by='binding_median_E2',ascending=False)
display(efnb2_good_efnb3_bad)
efnb2_bad_efnb3_good = df[
(df['binding_median_E2'] < -0.5) &
(df['binding_median_E3'] > 0.5)
].sort_values(by='binding_median_E3',ascending=False)
display(efnb2_bad_efnb3_good)
find_biggest_differences(df_binding_effect_merge)
| site | wildtype | mutant | binding_median_E2 | binding_std_E2 | times_seen_binding_E2 | effect_E2 | effect_std_E2 | times_seen_cell_entry_E2 | frac_models_E2 | binding_median_E3 | binding_std_E3 | times_seen_binding_E3 | effect_E3 | effect_std_E3 | times_seen_cell_entry_E3 | frac_models_E3 | binding_diff | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2555 | 303 | P | C | 1.223 | 1.577 | 5.00 | -0.745 | 0.590 | 5.000 | 1.0 | -0.566 | 0.098 | 4.0 | -0.527 | 0.594 | 4.857 | 1.0 | 1.789 |
| 5937 | 546 | L | H | 1.161 | 1.145 | 5.25 | -1.264 | 0.387 | 6.750 | 1.0 | -0.543 | 0.665 | 3.5 | -0.674 | 0.544 | 5.143 | 1.0 | 1.704 |
| 997 | 178 | V | T | 0.796 | 0.726 | 3.75 | -0.272 | 0.759 | 4.250 | 1.0 | -0.513 | 0.600 | 3.0 | -0.022 | 0.425 | 3.571 | 1.0 | 1.309 |
| 4692 | 447 | S | E | 0.767 | 1.742 | 4.25 | -1.116 | 0.812 | 4.750 | 1.0 | -0.557 | 0.393 | 4.0 | -0.078 | 0.325 | 4.571 | 1.0 | 1.324 |
| 1619 | 223 | A | M | 0.736 | 1.538 | 3.25 | -0.383 | 0.522 | 3.375 | 1.0 | -0.561 | 0.206 | 3.0 | -0.085 | 0.408 | 3.571 | 1.0 | 1.297 |
| 109 | 79 | N | S | 0.692 | 1.652 | 3.00 | -0.368 | 0.496 | 2.375 | 1.0 | -0.646 | 0.260 | 3.0 | -0.011 | 0.569 | 3.429 | 1.0 | 1.338 |
| 90 | 78 | D | I | 0.521 | 0.634 | 5.25 | -0.894 | 0.398 | 6.500 | 1.0 | -0.566 | 0.755 | 5.0 | -0.664 | 0.845 | 5.143 | 1.0 | 1.087 |
| site | wildtype | mutant | binding_median_E2 | binding_std_E2 | times_seen_binding_E2 | effect_E2 | effect_std_E2 | times_seen_cell_entry_E2 | frac_models_E2 | binding_median_E3 | binding_std_E3 | times_seen_binding_E3 | effect_E3 | effect_std_E3 | times_seen_cell_entry_E3 | frac_models_E3 | binding_diff | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 6512 | 588 | I | P | -2.042 | 0.840 | 5.25 | -0.276 | 0.300 | 4.750 | 1.0 | 1.183 | 0.255 | 5.5 | -0.607 | 0.382 | 5.429 | 1.0 | 3.225 |
| 3107 | 338 | R | G | -0.574 | 0.507 | 4.50 | 0.458 | 0.422 | 5.250 | 1.0 | 0.684 | 0.976 | 4.0 | 0.270 | 0.340 | 3.429 | 1.0 | 1.258 |
| 5260 | 491 | S | A | -0.799 | 0.537 | 5.50 | -0.441 | 0.806 | 6.250 | 1.0 | 0.645 | 0.010 | 6.5 | 0.232 | 0.358 | 6.143 | 1.0 | 1.444 |
| 5287 | 492 | Q | R | -2.826 | 0.611 | 19.25 | -0.003 | 0.272 | 23.120 | 1.0 | 0.644 | 0.054 | 20.0 | 0.549 | 0.119 | 20.570 | 1.0 | 3.470 |
| 4367 | 424 | P | A | -0.502 | 0.276 | 3.50 | 0.306 | 0.322 | 3.625 | 1.0 | 0.592 | 0.229 | 2.5 | 0.329 | 0.188 | 2.857 | 1.0 | 1.094 |
Find correlations between EFNB2 and EFNB3 binding¶
In [15]:
def plot_entry_binding_corr(df):
chart = alt.Chart(df,title='Correlation Between Mutant Binding Scores').mark_rect().encode(
x=alt.X('binding_median_E2',title='EFNB2 binding',axis=alt.Axis(values=[-5,0,2])).bin(maxbins=40),
y=alt.Y('binding_median_E3',title='EFNB3 binding',axis=alt.Axis(values=[-2,0,2])).bin(maxbins=40),
color=alt.Color('count()',title='Count').scale(scheme='greenblue'),
)
return chart
entry_binding_corr_heatmap_1 = plot_entry_binding_corr(df_binding_effect_merge)
entry_binding_corr_heatmap_1.display()
if entry_binding_combined_corr_plot is not None:
entry_binding_corr_heatmap_1.save(binding_corr_heatmap)
In [16]:
def plot_affinity_solid(df):
df = df.dropna()
# calculate r value
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(df['binding_median_E2'], df['binding_median_E3'])
r_value = float(r_value)
# make chart
chart = alt.Chart(df,title=alt.Title('Correlation between Mutant Binding Scores',subtitle=f'r={r_value:.2f}')).mark_point(color='black',size=30, opacity=0.2,filled=True).encode(
x=alt.X('binding_median_E2', title=('EFNB2 Binding')),
y=alt.Y('binding_median_E3', title=('EFNB3 Binding')),
tooltip=['site', 'wildtype','mutant','binding_median_E2','binding_median_E3','effect_E2','effect_E3'],
)
min = int(df['binding_median_E2'].min())
max = int(df['binding_median_E3'].max())
text = alt.Chart({'values':[{'x': min, 'y': max, 'text': f'r = {r_value:.2f}'}]}).mark_text(
align='left', baseline='top', dx=-10, dy=-20).encode(
x=alt.X('x:Q'),
y=alt.Y('y:Q'),
text='text:N'
)
chart_and_text = chart
return chart_and_text
E2_E3_corr = plot_affinity_solid(df_binding_effect_merge)
E2_E3_corr.display()
if entry_binding_combined_corr_plot is not None:
E2_E3_corr.save(E2_E3_correlation)
Plot correlations between summary statistics for each site¶
In [17]:
def plot_affinity_solid_mean(df):
df = df.dropna()
means = df.groupby('site').agg({
'effect_E2': 'median',
'effect_E3': 'median',
'binding_median_E2': 'median',
'binding_median_E3': 'median',
'wildtype': 'first'
}).reset_index()
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(means['binding_median_E2'], means['binding_median_E3'])
r_value = float(r_value)
chart = alt.Chart(means,title=alt.Title('Correlation between Aggregate Mutant Binding Scores',subtitle=f'r={r_value:.2f}')).mark_point(size=50, opacity=0.3).encode(
x=alt.X('binding_median_E2', title=('Median EFNB2 Binding'), axis=alt.Axis(tickCount=3)),
y=alt.Y('binding_median_E3', title=('Median EFNB3 Binding'), axis=alt.Axis(tickCount=3)),
tooltip=['site', 'wildtype','binding_median_E2','binding_median_E3','effect_E2','effect_E3'],
)
text = alt.Chart({'values':[{'x': -3.5, 'y': 0.5, 'text': f'r = {r_value:.2f}'}]}).mark_text(
align='left', baseline='top', dx=0, dy=-10).encode(
x=alt.X('x:Q'),
y=alt.Y('y:Q'),
text='text:N'
)
chart_and_text = chart
return chart_and_text
E2_E3_site_corr = plot_affinity_solid_mean(df_binding_effect_merge)
E2_E3_site_corr.display()
if entry_binding_combined_corr_plot is not None:
E2_E3_site_corr.save(E2_E3_correlation_site)
if entry_binding_combined_corr_plot is not None:
(E2_E3_site_corr | E2_E3_corr).save(combined_E2_E3_site_corr)
Make plot showing binding by site (median)¶
In [18]:
def plot_affinity_by_site_median(df):
variant_selector = alt.selection_point(
on="mouseover",
nearest=True,
empty=False,
fields=["site"],
value=0
)
empty_charts = []
for selection in ['binding_median_E2','binding_median_E3']:
if selection == 'binding_median_E2':
name = 'EFNB2 Binding'
else:
name = 'EFNB3 Binding'
mean = df.groupby('site')[selection].max().reset_index()
mean = mean[mean[selection] >= 0]
chart = alt.Chart(mean).mark_point(stroke='black',filled=True,size=50).encode(
x=alt.X('site', title=('Site'), axis=alt.Axis(grid=True, tickCount=4),scale=alt.Scale(domain=[70,602])),
y=alt.Y(selection, title=(name), axis=alt.Axis(grid=True, tickCount=3)),
tooltip=['site'],
color=alt.condition(variant_selector, alt.value('orange'), alt.value('black')),
opacity=alt.condition(variant_selector, alt.value(1), alt.value(0.5)),
strokeWidth=alt.condition(variant_selector,alt.value(1),alt.value(0))
).properties(height=150,width=500).add_params(variant_selector)
empty_charts.append(chart)
combined_chart = alt.vconcat(*empty_charts, spacing=1,title='Max Binding by Site')
return combined_chart
binding_by_site = plot_affinity_by_site_median(df_binding_effect_merge)
binding_by_site.display()
if entry_binding_combined_corr_plot is not None:
binding_by_site.save(binding_by_site_plot)
In [19]:
def plot_affinity_by_contact_site(df,sites_to_show,title_text):
variant_selector = alt.selection_point(
on="mouseover",
nearest=True,
empty=False,
fields=["site"],
value=0
)
empty_charts = []
contact_df = df[df['site'].isin(sites_to_show)]
sites = list(contact_df['site'].unique())
for selection in df['selection'].unique():
tmp_df = contact_df[contact_df['selection'] == selection]
mean = tmp_df.groupby('site')['binding_median'].max().reset_index()
chart = alt.Chart(mean).mark_point(size=100).encode(
x=alt.X('site:O', sort=sites,title=('Site'), axis=alt.Axis(grid=True, labelAngle=-90),scale=alt.Scale(domain=sites)),
y=alt.Y('binding_median', title=(f'{selection}'), axis=alt.Axis(grid=True)),
tooltip=['site'],
color=alt.condition(variant_selector, alt.value('orange'), alt.value('black')),
strokeWidth=alt.condition(variant_selector,alt.value(2),alt.value(0))
).add_params(variant_selector)
empty_charts.append(chart)
combined_chart = alt.vconcat(*empty_charts, spacing=1,title=title_text)
return combined_chart
contact_binding_by_site = plot_affinity_by_contact_site(df_binding_effect_concat,config['contact_sites'],'Max Binding in Contact')
contact_binding_by_site.display()
if entry_binding_combined_corr_plot is not None:
contact_binding_by_site.save(max_binding_in_contact)
contact_binding_by_site_stalk = plot_affinity_by_contact_site(df_binding_effect_concat,list(range(96, 147)),"Max Binding in Stalk")
contact_binding_by_site_stalk.display()
if entry_binding_combined_corr_plot is not None:
contact_binding_by_site_stalk.save(max_binding_in_stalk)
Make bubble plots for binding in different areas of receptor pocket¶
In [20]:
def make_boxplot_binding_region(df,title):# Create a box plot using Altair for aggregated means
barrel_ranges = {
'Hydrophobic': config['hydrophobic'],
'Salt Bridges': config['salt_bridges'],
'Hydrogen Bonds': config['h_bond_total'],
'Contact': config['contact_sites'],
'Overall': list(range(71,602)),
}
mean_df = df.groupby('site')[['binding_median']].median().reset_index()
custom_order = ['Hydrophobic','Salt Bridges','Hydrogen Bonds','Contact','Overall']
agg_means = []
# For each barrel, filter the site_means dataframe to the sites belonging to that barrel and then store the means
for barrel, sites in barrel_ranges.items():
subset = mean_df[mean_df['site'].isin(sites)]
for _, row in subset.iterrows():
agg_means.append({'barrel': barrel, 'effect': row['binding_median'],'site':row['site']})
agg_means_df = pd.DataFrame(agg_means)
chart = alt.Chart(agg_means_df).mark_point(size=50,opacity=0.4).encode(
x=alt.X('barrel:O', sort=custom_order,title=None,axis=alt.Axis(labelAngle=-90)),
y=alt.Y('effect',title=f'Median {title} Binding',axis=alt.Axis(grid=True,tickCount=4)),
xOffset='random:Q',
tooltip=['barrel', 'effect','site'],
).transform_calculate(
random="sqrt(-1*log(random()))*cos(2*PI*random())"
)
return chart.display()
make_boxplot_binding_region(df_E2_filter,'EFNB2')
make_boxplot_binding_region(df_E3_filter,'EFNB3')
make boxplot of binding scores by region¶
In [21]:
def make_boxplot_binding_region(df):
barrel_ranges = {
"Stalk": list(range(96, 147)),
"Neck": list(range(148, 165)),
"Linker": list(range(166, 177)),
"Head": list(range(178, 602)),
'Receptor Contact': config['contact_sites'],
"Total": list(range(71, 602)),
}
custom_order = ["Stalk", "Neck", "Linker", "Head", "Receptor Contact", "Total"]
empty_charts = []
for selection in df['selection'].unique():
tmp_df = df[df["selection"] == selection]
agg_means = []
# For each barrel, filter the site_means dataframe to the sites belonging to that barrel and then store the means
for barrel, sites in barrel_ranges.items():
subset = tmp_df[tmp_df["site"].isin(sites)]
for _, row in subset.iterrows():
agg_means.append(
{"region": barrel, "binding_median": row["binding_median"], "site": row["site"]}
)
agg_means_df = pd.DataFrame(agg_means)
chart = (
alt.Chart(agg_means_df, title=f"{selection}")
.mark_boxplot(color="darkgray", extent="min-max", opacity=1)
.encode(
x=alt.X(
"region:O",
sort=custom_order,
title="RBP Region",
axis=alt.Axis(labelAngle=-90),
),
y=alt.Y(
"binding_median",
title=f"Binding",
axis=alt.Axis(grid=True, tickCount=4),
),
tooltip=["region", "binding_median", "site"],
).properties(width=config['width'],height=config['height'])
)
empty_charts.append(chart)
combined_effect_chart = alt.hconcat(*empty_charts).resolve_scale(
y="shared", x="shared", color="independent"
)
return combined_effect_chart
entry_region_boxplot = make_boxplot_binding_region(df_binding_effect_concat)
entry_region_boxplot.display()
if entry_binding_combined_corr_plot is not None:
entry_region_boxplot.save(binding_region_boxplot_plot)
In [22]:
def make_bubble_binding_region(df):
barrel_ranges = {
"Stalk": list(range(96, 147)),
"Neck": list(range(148, 165)),
"Linker": list(range(166, 177)),
"Head": list(range(178, 602)),
'Receptor Contact': config['contact_sites'],
"Total": list(range(71, 602)),
}
custom_order = ["Stalk", "Neck", "Linker", "Head", "Receptor Contact", "Total"]
empty_charts = []
for selection in df['selection'].unique():
tmp_df = df[df["selection"] == selection]
agg_means = []
# For each barrel, filter the site_means dataframe to the sites belonging to that barrel and then store the means
for barrel, sites in barrel_ranges.items():
subset = tmp_df[tmp_df["site"].isin(sites)]
for _, row in subset.iterrows():
agg_means.append(
{"region": barrel, "binding_median": row["binding_median"], "site": row["site"],"mutant": row["mutant"]}
)
agg_means_df = pd.DataFrame(agg_means)
variant_selector = alt.selection_point(
on="mouseover", empty=False, fields=["site",'mutant'], value=1
)
chart = (
alt.Chart(agg_means_df, title=f"{selection}")
.mark_point(opacity=0.3, stroke='black')
.encode(
x=alt.X(
"region:O",
sort=custom_order,
title="RBP Region",
axis=alt.Axis(labelAngle=-90),
),
y=alt.Y(
"binding_median",
title=f"Binding",
axis=alt.Axis(grid=True, tickCount=4),
),
xOffset="random:Q",
tooltip=["region", "binding_median", "site","mutant"],
color=alt.condition(
variant_selector, alt.value("orange"), alt.value("black")
),
opacity=alt.condition(variant_selector, alt.value(1), alt.value(0.1)),
strokeWidth=alt.condition(variant_selector,alt.value(2),alt.value(0)),
size=alt.condition(variant_selector,alt.value(50),alt.value(15)),
).transform_calculate(
random="sqrt(-1*log(random()))*cos(2*PI*random())"
).properties(width=config['width'],height=config['height'])
).add_params(variant_selector)
empty_charts.append(chart)
combined_effect_chart = alt.hconcat(*empty_charts).resolve_scale(
y="shared", x="shared", color="independent"
).add_params(variant_selector)
return combined_effect_chart
entry_region_bubble = make_bubble_binding_region(df_binding_effect_concat)
entry_region_bubble.display()
if entry_binding_combined_corr_plot is not None:
entry_region_bubble.save(binding_region_bubble_plot)
In [ ]: